package com.primeton.fbsearch.ppt;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.poi.hslf.extractor.QuickButCruddyTextExtractor;

import com.primeton.fbsearch.framework.DocumentHandler;
import com.primeton.fbsearch.framework.DocumentHandlerException;

public class POIPPTHandler implements DocumentHandler{
	
	 public Document getDocument(InputStream is)    throws DocumentHandlerException {
		String content = null;
//		StringBuffer sb = new StringBuffer();
		try {
			
//			PowerPointExtractor pp = new PowerPointExtractor(is);
			
			QuickButCruddyTextExtractor ex =  new QuickButCruddyTextExtractor (is);
			content = ex.getTextAsString();
			ex.close();
			ex =null;
			//InputStream is = new FileInputStream(new File(file));
			// SlideShow ss = new SlideShow(new HSLFSlideShow(is));
			 //SlideShow ss = new SlideShow(is);
			//PowerPointExtractor pe = new PowerPointExtractor(is);
			//System.out.println(pe.getText());
 
			/*
			Slide[] slides = ss.getSlides();
			//for (Slide slide : slides) {
			
			for (int i=0;i<slides.length;i++) {
				Slide slide = slides[i];
				Shape[] sps = slide.getShapes();
				
				for (int j=0;j<sps.length;j++) {
				//for (Shape sp : sps) {
					try {
						Shape sp = sps[j];
						
						String s = ((TextBox) sp).getText();
						sb.append(s);
						sb.append("\r");
					} catch (ClassCastException ex) {
						//if not text ; do nothing
					}

				}
			}
			*/
			//content = sb.toString();
		} catch (Exception e) {
			e.printStackTrace();
			throw new DocumentHandlerException(
					"Cannot extract text from a ppt document", e);
		}
	    Document doc = new Document();
		if (content.length() > 0) {
			//return content;
		      doc.add(new Field("body", content,Field.Store.YES, Field.Index.TOKENIZED,	Field.TermVector.WITH_POSITIONS_OFFSETS));
		      return doc;
		}
		
		return null;
	}
	 
	 public static void main(String[] argv) throws Exception
	 {
		 POIPPTHandler handler = new POIPPTHandler();
		 String file = "D:\\OpenSource\\search_data\\普元EOS基础开发培训课程.ppt";
 		InputStream is = new FileInputStream(new File(file));
		Document doc =  handler.getDocument(is);
		System.out.println(doc.getField("body"));
		 
		 
	 }
	 
}
